

*cutoff for shock
local cutoff 0.4
*cutoff for previous years
local cutoff2 -0.3

use "$singleaudit\singleaudit_cfda", clear
joinby cfda using "$data/matching/cfda_proportion"

*drop years non-overlapping with IRIS data
drop if year<minyear
drop if year>maxyear
egen cfda_num = group(cfda)
tsset cfda_num year
gen logrd=log(amount_rd)
gen d_logrd=log(amount_rd)-log(L.amount_rd)
by cfda_num: egen mostpositiveshock=max(d_logrd)
by cfda_num: egen mostnegativeshock=min(d_logrd)

*drop CFDA codes with less than 5 years after 2010
by cfda_num: egen number_year=sum(year>=2010)
drop if number_year<=5

*define positive shock
gen byte temp_pos_shock= (d_logrd>`cutoff' & d_logrd<.)

*require shock to be temporary
gen futuremin=log(F.amount_rd)
forv i=2/10 {
replace futuremin=log(F`i'.amount_rd) if log(F`i'.amount_rd)<futuremin
}
gen diff=futuremin - log(L.amount_rd)
replace temp_pos_shock=0 if diff>0
 
*require no large positive or negative shocks right before the shock
replace temp_pos_shock=0 if L.d_logrd>-`cutoff2' | (L2.d_logrd>-`cutoff2' & L2.d_logrd<.)
replace temp_pos_shock=0 if L.d_logrd<`cutoff2' | L2.d_logrd<`cutoff2'

gen temp_year=year if temp_pos_shock==1
by cfda_num: egen year_shock=min(temp_year)
by cfda_num: egen nshock=sum(temp_pos_shock)

drop if mostpositiveshock>-`cutoff2' & nshock==0

collapse nshock year_shock*, by (cfda)

ren nshock nshock_positive
merge 1:1 cfda using  "$data/matching/cfda_shocks", keep(1 3) keepus(nshock) nogen
drop if nshock>0 & nshock<.
drop nshock
ren nshock_positive nshock

tab nshock
drop if nshock>1

compress
save "$data/matching/cfda_shocks_positive", replace


*construct treatment and control group
use "$data/matching/employee_cfda", clear
*treatment group: people getting >50% of funding from a treated CFDA 
merge m:1 cfda using "$data/matching/cfda_shocks_positive", keep(3)
gen share_treatment=proportion if nshock>0
bysort iris: egen maxshare_treatment=max(share_treatment)
drop if share_treatment!=.&share_treatment!=maxshare_treatment
gen share_control=proportion if nshock==0
collapse (sum) share_treatment share_control (mean) year_shock* minyear maxyear (max) nshock, by(iris)
count if share_control>=0.5 & share_treatment==0
count if share_treatment>=0.5 & year_shock>minyear
*require shock is after the first year of funding
gen treated=1 if share_treatment>0.5 & year_shock>minyear
replace treated=0 if share_control>=0.5 & share_treatment==0
drop if treated==.
keep iris treated year_shock minyear
compress
save "$data/matching/employee_shocks_positive", replace



***regression
use "$data/regression_sample", clear
merge m:1 iris using "$data/matching/employee_shocks_positive", keep(3) nogen

gen tau=year-year_shock
replace tau=-1 if treated==0
forv i=13(-1)1 {
	gen byte periodm`i'=(tau==-`i')
}

forv i=0/15 {
gen byte period`i' = (tau==`i')
}
gen periodm5b=(tau<=-5)
gen periodm7b=(tau<=-7)
gen period5a=(tau>=5)
gen period7a=(tau>=7)
gen byte post=(tau>0)

gen temp=year if log_expenditure!=.
cap drop minyear
bysort iris: egen minyear=min(temp)

bysort emp_num: egen nyear=sum(log_expenditure!=.)

gen log_expenditure_control=log(total_direct_expenditure+1)
gen log_expenditure_federal=log(1+total_direct_federal)
gen log_expenditure_private=log(1+total_direct_private)
gen fieldcat="Science" if ( fieldid==4 | fieldid==12 | fieldid==15 | fieldid==16 | fieldid==5)
replace fieldcat="Engineering" if fieldid==8
replace fieldcat="Medicine" if fieldid==13 | fieldid==2
replace fieldcat="Other" if fieldcat==""

merge 1:1 iris year using "$data\pi", keep(1 3) nogen
sort emp_num year
replace pinumber=pinumber[_n-1] if emp_num==emp_num[_n-1] & pinumber==.
replace pinumber=0 if pinumber==.

*by citations

gen npatent_highcitation=npatent_quartile3+npatent_quartile4
gen npatent_lowcitation=npatent-npatent_highcitation
gen npatent_loworiginality=npatent-npatent_highoriginality
gen npatent_lowgenerality=npatent-npatent_highgenerality
gen pub=(npub>0)
gen npub_lowjif=npub-npub_highjif
gen npub_lowcite1=npub-npub_highcite1
gen npub_lowcite2=npub-npub_highcite2

tsset emp_num year

/**************************************
***********REGRESSIONS***************
**************************************/

global Y patent npatent npatent_lowcitation npatent_highcitation npatent_lowgenerality npatent_highgenerality pub npub fcites_3yr npub_highcite2 npub_lowcite2 npub_highjif npub_lowjif npub_basic npub_applied log_expenditure_federal log_expenditure_private log_expenditure share_federal share_private

*Diff in diff regressions (Table 2, 4, 5, 7)
eststo clear

*person FE
foreach var in $Y {
    eststo `var': reghdfe `var' post if tau^2<50 & include==1, a(i.year#i.submit_u#i.fieldid i.emp_num) vce(cluster emp_num)
}

esttab using "$data\results\pretrend\march2021\diffindiff.csv", replace numbers label nogaps r2 ar2 star(* 0.1 ** 0.05 *** 0.01) b(%8.4f) se(%8.4f)

eststo clear

*PI FE
foreach var in $Y {
    eststo `var': reghdfe `var' post if tau^2<50 & include==1, a(i.year#i.submit_u#i.fieldid i.pinumber) vce(cluster emp_num)
}

esttab using "$data\results\pretrend\march2021\diffindiff.csv", append numbers label nogaps r2 ar2 star(* 0.1 ** 0.05 *** 0.01) b(%8.4f) se(%8.4f)
